---
title: "Identifying Price Informativeness"
author: "Eduardo Davila^[Yale] & Cecilia Parlatore^[NYU Stern]"
date: "`r Sys.Date()`"
output:
  html_document: default
  pdf_document: default
---

```{r}

library(here); library(tidyverse); library(lubridate); library(data.table); library(zoo); library(foreach); library(doParallel)
path <- here::here(); print(path); setwd(path); rm(path)

source("functions/fn_sp500_dummy.R")

load("input/raw_fred.RData")
load("input/raw_sp500.RData")
load("input/raw_crsp.RData")

```

# Building CRSP

- Note that the CRSP data is monthly, while the compustat data is quarterly/annual. Variables are in millions of dollars.
- shrout is in thousands of shares.
- hexcd doesn't change (header exchange code): Exchange Code - Header displays the Exchange Code on which a security was last listed.
- Combines permcos with multiple permnos
- shrout:	measured in thousands of shares
- vol: sum of the trading volumes during the month, reported in units of 100 (not adjusted for splits, etc)
<!-- Example: Berkshire Hathaway, permnos 17778 and 83443, permnco 540 -->

```{r}

fn_clean_crsp <- function(crsp, crsp_div){
  
  d <- crsp_div %>% 
    mutate(date = date(mthcaldt),
           year = year(date),
           month = month(date),
           dividend=coalesce(dividend, 0)) %>%
    group_by(date, permno, permco) %>%
    summarize(dividend=sum(dividend))
  
  output <- crsp %>% 
    filter(hexcd  %in% c(1, 2, 3)) %>%
    filter(exchcd %in% c(1, 2, 3)) %>%
    mutate(date  = ymd(date),
           year  = year(date), 
           month = month(date))  %>% 
    mutate(prc = ifelse(is.na(prc), dlprc, prc),
           ret = ifelse(is.na(ret), dlret, ret)) %>% 
    mutate(mcap_permno = abs(prc)*shrout/1000) %>% # mcap for each permno in millions
    left_join(d, by=c("date", "permno", "permco")) %>%
    mutate(dividend=coalesce(dividend, 0)) %>%
    group_by(year, month, permco) %>%
    mutate(mcap = sum(mcap_permno)) %>% # to find market cap, merge permnos with same permnco
    arrange(year, month, permco, desc(mcap_permno)) %>% # sort from largest to smallest permno
    group_by(year, month, permco) %>%
    slice(1) %>% # keep only permno with largest marketcap
    ungroup() %>% # distinct(year, date, permno) %>%
    mutate(prc      = abs(prc),       # negative prices means security wasn't traded
           ashrout  = shrout*cfacshr,
           adjprc   = prc/cfacpr,
           turnover = vol/shrout*10,  # volume is on hundred of shares/shrout is thousands, the *10 makes it thousands
           sic1     = siccd %/% 1000,
           sic1     = factor(sic1),
           hexcd    = factor(hexcd),
           exchcd   = factor(exchcd)) %>%
    mutate(sic1     = fct_recode(sic1,
                                            "Agriculture/Mining/Construction" = "0",
                                            "Agriculture/Mining/Construction" = "1",
                                            "Manufacturing"                   = "2",
                                            "Manufacturing"                   = "3",
                                            "Transportation/Communications"   = "4",
                                            "Wholesale/Retail trade"     = "5",
                                            "Finance/Insurance"          = "6",
                                            "Services"                   = "7",
                                            "Services"                   = "8",
                                            "Public administration"      = "9")) %>%
    mutate(hexcd = fct_recode(hexcd,
                                             "NYSE"   = "1",
                                             "AMEX"   = "2",
                                             "NASDAQ" = "3")) %>%
    mutate(exchcd = fct_recode(exchcd,
                                             "NYSE"   = "1",
                                             "AMEX"   = "2",
                                             "NASDAQ" = "3")) %>% 
    select(permno, date, year, month, mcap, adjprc, turnover, hexcd, exchcd, 
           siccd, sic1, ret, vwretd, shrout, dividend) %>%
    arrange(permno, year, month) %>%
    group_by(permno) %>%
    mutate(annual_dividend=rollsumr(dividend, k = 12, fill = NA),
           annual_dividend=ifelse(abs(annual_dividend)<1e-6, 0, annual_dividend)) %>%
    ungroup() %>%
    fn_sp500_dummy(sp500)
  
  return(output)
  }

crsp <- fn_clean_crsp(crsp_m, crsp_msf_div)

```
# Calculate market beta
```{r}

market_index <- crsp %>%
  filter(sp500 == 1) %>%
  group_by(date, month, year) %>%
  summarise(return = mean(vwretd)) %>%
  left_join(df_fred, by = c("month", "year"), suffix=c("", "outer")) %>%
  mutate(rfr = (1 + gs1/100)^(1/12)-1) %>%
  mutate(excess_market_return = return - rfr) %>%
  select(date, month, year, excess_market_return, rfr)

roll_beta <- function(x){
  if(sum(is.finite(x[,1])) >= 24 && sum(is.finite(x[,2])) >= 24){
    return(cov(x[,1], x[,2], use="pairwise")/var(x[,2], na.rm=TRUE))
  }else{
    return(NA)
  }
}

crsp <- crsp %>%
  left_join(market_index, by = c("date", "month", "year")) %>%
  mutate(excess_stock_return = ret - rfr)

cores = detectCores(logical=FALSE)
cl <- makeCluster(cores-1, type="SOCK")
registerDoParallel(cl)

betas <- foreach(p=unique(crsp$permno), .combine="c", .packages = c('tidyverse', 'zoo')) %dopar% {
   market_beta = rollapplyr(crsp %>% filter(permno == p) %>% select(excess_stock_return, excess_market_return), width=60, partial=24, FUN=roll_beta, by.column=FALSE, fill=NA)

   market_beta
}

stopCluster(cl)

crsp$market_beta <- betas
crsp <- crsp %>% select(-c(excess_stock_return, excess_market_return, rfr))

save(crsp, file = "intermediate/clean_crsp.RData")
```
